High dimensional single-cell analysis reveals iNKT cell developmental trajectories and effector fate decision

In [1]:
import numpy as np
import pandas as pd
import scanpy as sc
import warnings
import matplotlib.pyplot as plt
import seaborn as sns
warnings.filterwarnings('ignore')

import logging
mpl_logger = logging.getLogger('matplotlib')
mpl_logger.setLevel(logging.WARNING) 

sc.settings.verbosity = 3             # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_versions()
scanpy==1.4.4.post1 anndata==0.6.22.post1 umap==0.3.10 numpy==1.17.3 scipy==1.4.1 pandas==0.25.3 scikit-learn==0.21.3 statsmodels==0.10.2 python-igraph==0.7.1 louvain==0.6.1
In [2]:
sc.settings.set_figure_params(dpi=130)
In [3]:
wt = sc.read_h5ad('./output/wt.preprocessing.h5ad')
wt.shape
Out[3]:
(3288, 12958)
In [4]:
#hvgs = adata_ko.var['highly_variable'].intersection(adata_wt.var['highly_variable'])
sc.pp.highly_variable_genes(wt, min_mean=0.0125, max_mean=3, min_disp=0.5)
sc.pl.highly_variable_genes(wt)
np.sum(wt.var['highly_variable'])
extracting highly variable genes
    finished (0:00:00)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)
Out[4]:
1804
In [5]:
#wt = wt[:, wt.var['highly_variable']]
In [6]:
#sc.pp.regress_out(wt, ['n_counts'])
In [7]:
sc.pp.scale(wt, max_value=10)
In [8]:
sc.tl.pca(wt, svd_solver='arpack')
computing PCA with n_comps = 50
computing PCA on highly variable genes
    finished (0:00:00)
In [9]:
sc.pl.pca_variance_ratio(wt, log=True)
In [10]:
sc.pp.neighbors(wt)
computing neighbors
    using 'X_pca' with n_pcs = 50
    finished: added to `.uns['neighbors']`
    'distances', distances for each pair of neighbors
    'connectivities', weighted adjacency matrix (0:00:02)
In [11]:
sc.tl.umap(wt)
computing UMAP
    finished: added
    'X_umap', UMAP coordinates (adata.obsm) (0:00:09)
In [12]:
wt.obs
Out[12]:
sample n_genes percent_mito percent_ribo n_counts doublet_scores predicted_doublets doubletDetection
index
AAACCTGAGATAGGAG-1 wt 3882 0.023835 0.282081 13080.0 0.063116 False 0.0
AAACCTGCAAACTGCT-1 wt 1532 0.027186 0.472325 2564.0 0.144465 False 0.0
AAACCTGCACAGAGGT-1 wt 1729 0.019697 0.418037 3464.0 0.063116 False 0.0
AAACCTGCACGAGGTA-1 wt 1472 0.021231 0.392310 2489.0 0.235632 False 0.0
AAACCTGCAGTCAGCC-1 wt 1205 0.035033 0.329171 1833.0 0.044776 False 0.0
... ... ... ... ... ... ... ... ...
TTTGTCAGTTGTCTTT-1 wt 1870 0.022153 0.345661 3458.0 0.172113 False 0.0
TTTGTCATCAACACTG-1 wt 1758 0.016772 0.395422 2985.0 0.066530 False 0.0
TTTGTCATCAACTCTT-1 wt 1601 0.017723 0.309541 2778.0 0.078522 False 0.0
TTTGTCATCATCGGAT-1 wt 1652 0.023945 0.372915 3080.0 0.189573 False 0.0
TTTGTCATCCCGGATG-1 wt 1626 0.024395 0.433154 2828.0 0.094040 False 0.0

3288 rows × 8 columns

In [13]:
sc.tl.louvain(wt, resolution=1.6)
sc.pl.umap(wt, color=['louvain','n_genes','n_counts'])
running Louvain clustering
    using the "louvain" package of Traag (2017)
    finished: found 13 clusters and added
    'louvain', the cluster labels (adata.obs, categorical) (0:00:00)
In [14]:
mean_cellType = np.empty((len(wt.obs['louvain'].unique()), wt.raw.shape[1]), 
                           dtype=float, order='C')
In [15]:
raw_adata = wt.raw.X.toarray()
In [16]:
for i in range(0, len(wt.obs['louvain'].unique())):
    #print(adata.obs['phenograph'].unique()[i])
    mean_cellType[i,:] = np.mean(raw_adata[wt.obs['louvain'] == wt.obs['louvain'].unique()[i], :], axis = 0)
In [17]:
mean_df = pd.DataFrame(np.corrcoef(mean_cellType), index = wt.obs['louvain'].unique(), columns = wt.obs['louvain'].unique())
In [18]:
import seaborn
ax = seaborn.clustermap(mean_df, cmap="jet").fig.suptitle('louvain') 
In [19]:
sc.tl.rank_genes_groups(wt, 'louvain', method='wilcoxon')
sc.pl.rank_genes_groups(wt, n_genes=25, sharey=False)
ranking genes
    finished: added to `.uns['rank_genes_groups']`
    'names', sorted np.recarray to be indexed by group ids
    'scores', sorted np.recarray to be indexed by group ids
    'logfoldchanges', sorted np.recarray to be indexed by group ids
    'pvals', sorted np.recarray to be indexed by group ids
    'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:04)
In [20]:
pd.DataFrame(wt.uns['rank_genes_groups']['names']).head(10)
Out[20]:
0 1 2 3 4 5 6 7 8 9 10 11 12
0 Klf2 Klrb1c Ctla2a Izumo1r Ramp1 Ifngr1 Eef1a1 Ifit3 Ltb Ranbp1 Emb Hmgb2 Lef1
1 S100a6 Klra9 Nkg7 Cmtm7 Plac8 Cd7 Gm10073 Isg15 Cxcr6 Srm Lmo4 Stmn1 Ldhb
2 Crip1 Klrd1 Bcl2 Smpdl3a Id3 Xcl1 Cmtm7 Stat1 Gimap4 Cyba Ramp1 Cks1b H3f3a
3 Fcer1g Id2 Klrb1c Ramp1 Tesc Ltb Cd3d Isg20 Thy1 Eif5a Pxdc1 H2afz Id3
4 Vim Cd7 Gimap3 Srgn Lmo4 Rgs1 Cd3g Ifit1 Ifngr1 Pfn1 Lgals3 Ptma Slamf6
5 Ly6c2 Ptpn6 Klrd1 Icos Slc25a5 AW112010 Ftl1 Irf7 Btg1 Il2rb Ckb Hmgn2 Stmn1
6 Tmsb4x Mbnl1 Gzmb Ptprcap Hsp90ab1 Rgs2 Cd3e Ms4a4b H2-K1 Eno1 Rexo2 Lockd Slc29a1
7 S100a4 Il2rb Fgl2 Cd28 Icos H2-D1 Gm8730 Rtp4 Gimap3 Nme1 Icos Birc5 Sox4
8 Ccl5 Nkg7 Thy1 Limd2 Slamf6 Fgl2 Ly6a Zbp1 Malat1 Ptma Fam83a 2810417H13Rik Nrgn
9 Emp3 Efhd2 Ms4a4b S100a10 Tcf7 Runx3 Morf4l1 Ifi27l2a Fyb Thy1 Sptssa Hmgb1 Atpif1
In [21]:
sc.pl.umap(wt, color=['Mki67', 'Top2a', 'Ube2c'], color_map="jet") # Cycling
In [22]:
sc.pl.umap(wt, color=['Lef1','Itm2a'], color_map="jet") # NKT0
In [23]:
sc.pl.umap(wt, color=['Plac8','Icos'], color_map="jet") # NKT2
In [24]:
sc.pl.umap(wt, color=['Tmem176a','Emb'], color_map="jet") # NKT17
In [25]:
sc.pl.umap(wt, color=['Klrb1c','Il2rb'], color_map="jet") # NKT1
In [26]:
sc.pl.umap(wt, color=['Ifit1','Ifit3','Isg15'], color_map="jet") # NKT1d
In [27]:
sc.pl.umap(wt, color=['Fhl2'], color_map="jet") # NKT2a
In [28]:
sc.pl.umap(wt, color=['louvain'], legend_loc = 'on data', legend_fontsize = 6)
In [29]:
marker_genes = ['Lef1','Itm2a','Mki67','Top2a','Zbtb16','Plac8','Izumo1r','Ifit1','Ifit3','Isg15','Ly6a','Fhl2','Nkg7','Fcer1g','Gzma','Gzmb','Rorc','Serpinb1a','Tmem176a','Tmem176b']
ax = sc.pl.dotplot(wt, marker_genes, groupby='louvain')
In [30]:
new_cluster_names = {
    '0':'NKT1c', '1':'NKT1b', '2':'NKT1b', '3':'NKT2b', '4':'NKT2a',
    '5':'NKT1b','6':'NKT1a', '7':'NKT1d', '8':'NKT1b', '9':'NKT1b',
    '10':'NKT17', '11':'Cycling NKT', '12':'NKT0'}
In [31]:
vect = []
for i in range(0, len(wt.obs['louvain'])):
    vect = vect + [new_cluster_names[str(wt.obs['louvain'][i])]]
    
wt.obs['cell_type'] = vect

# or
#adata.obs['louvain'].cat.categories
#adata.rename_categories('louvain', ['TA', 'EP (early)', 'Stem', 'Goblet', 'EP (stress)', 'Enterocyte', 'Paneth', 'Enteroendocrine', 'Tuft'])
#adata.obs['louvain'].value_counts()
In [32]:
sc.pl.umap(wt, color='cell_type', title='', frameon=False)
... storing 'cell_type' as categorical
In [33]:
wt.obs['cell_type'].cat.categories
wt.obs['cell_type'].cat.reorder_categories(['NKT0','Cycling NKT','NKT17','NKT2a','NKT2b','NKT1a','NKT1b','NKT1c','NKT1d'], inplace = True)
wt.uns['cell_type_colors'] = [ '#A31E22', # NKT0
                                '#F3766E', # Cycling NKT 
                                '#2da9d2', # NKT17
                                '#FEC85A', # NKT2a
                                '#FAE600', # NKT2b
                                '#50C878', # NKT1a
                                '#2E8B57', # NKT1b
                                '#0B6623', # NKT1c
                                '#4CBB17'] # NKT1d
In [34]:
sc.pl.umap(wt, color='cell_type', title='wt', frameon=False)
In [35]:
wt.obs.groupby(["cell_type"]).apply(len)
Out[35]:
cell_type
NKT0             40
Cycling NKT      86
NKT17            89
NKT2a           347
NKT2b           379
NKT1a           252
NKT1b          1388
NKT1c           460
NKT1d           247
dtype: int64
In [36]:
wt.shape
Out[36]:
(3288, 12958)

Gene markers

In [37]:
sc.tl.rank_genes_groups(wt, 'cell_type', method='wilcoxon', n_genes=200)
ranking genes
    finished: added to `.uns['rank_genes_groups']`
    'names', sorted np.recarray to be indexed by group ids
    'scores', sorted np.recarray to be indexed by group ids
    'logfoldchanges', sorted np.recarray to be indexed by group ids
    'pvals', sorted np.recarray to be indexed by group ids
    'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:03)
In [38]:
pd.DataFrame(wt.uns['rank_genes_groups']['names']).head(20)
Out[38]:
NKT0 Cycling NKT NKT17 NKT2a NKT2b NKT1a NKT1b NKT1c NKT1d
0 Lef1 Hmgb2 Emb Ramp1 Izumo1r Eef1a1 Nkg7 Klf2 Ifit3
1 Ldhb Stmn1 Lmo4 Plac8 Cmtm7 Gm10073 Klrb1c S100a6 Isg15
2 H3f3a Cks1b Ramp1 Id3 Smpdl3a Cmtm7 AW112010 Crip1 Stat1
3 Id3 H2afz Pxdc1 Tesc Ramp1 Cd3d Id2 Fcer1g Isg20
4 Slamf6 Ptma Lgals3 Lmo4 Srgn Cd3g Il2rb Vim Ifit1
5 Stmn1 Hmgn2 Ckb Slc25a5 Icos Ftl1 Klrd1 Ly6c2 Irf7
6 Slc29a1 Lockd Rexo2 Hsp90ab1 Ptprcap Cd3e Xcl1 Tmsb4x Ms4a4b
7 Sox4 Birc5 Icos Icos Cd28 Gm8730 Ltb S100a4 Rtp4
8 Nrgn 2810417H13Rik Fam83a Slamf6 Limd2 Ly6a Malat1 Ccl5 Zbp1
9 Atpif1 Hmgb1 Sptssa Tcf7 S100a10 Morf4l1 Ctla2a Emp3 Ifi27l2a
10 Tubb5 Ran Tcf7 Rexo2 Slamf6 Hspa8 Gimap3 Nkg7 Slfn1
11 Gm43352 Cdca8 Gpx1 Snhg6 Sh2d1a Bcl2a1b Fgl2 Klre1 Gm4955
12 Cd28 2700094K13Rik Il17re Pdlim1 Gdi2 Cst7 H2-K1 Klrk1 Bst2
13 Pdcd1 Cenpa Tmem176a Zbtb16 Tspan32 Izumo1r Mbnl1 Gzmb Shisa5
14 Marcksl1 Cenpw Serpinb1a Naca Tox Btg1 Gimap4 Klra9 Ifit3b
15 Hmgn1 Cdca3 Blk Snrpe Zbtb16 Ubl5 Cd7 Cd7 Xaf1
16 Eif4a1 Cks2 Sdc1 Igfbp4 Rap1a Itgae Sh3bgrl3 Gzma Ms4a6b
17 Slc25a5 Txn1 Tnfrsf25 Cox6c S100a11 Tma7 H2-D1 Lgals1 Ifi203
18 Ccr9 Ccnb2 S100a4 Lgals1 Rgs10 Eng Gimap6 Bcl2 H2-T22
19 Tox H2afv Sepp1 Pycard Nop10 Ccdc12 Ms4a4b Anxa2 Ly6a
In [39]:
cell_type_nb = {}
list_cell_type = wt.obs['cell_type'].unique().tolist()
list_cell_type.sort()
for i in range(0, len(wt.obs['cell_type'].unique().tolist())):
    cell_type_nb[list_cell_type[i]] = i

#inverted_cell_type_nb = dict([[v,k] for k,v in cell_type_nb.items()])
#inverted_cell_type_nb
In [40]:
clusters = []
genes = []
logFC = []
score = []
pvals = []
pvals_adj = []

for cl in cell_type_nb.keys():
    clusters = clusters + ([cl]*len(wt.uns['rank_genes_groups']['names'][str(cl)]))
    genes = genes + wt.uns['rank_genes_groups']['names'][str(cl)].tolist()
    logFC = logFC + wt.uns['rank_genes_groups']['logfoldchanges'][str(cl)].tolist()
    score = score + wt.uns['rank_genes_groups']['scores'][str(cl)].tolist()
    pvals = pvals + wt.uns['rank_genes_groups']['pvals'][str(cl)].tolist()
    pvals_adj = pvals_adj + wt.uns['rank_genes_groups']['pvals_adj'][str(cl)].tolist()
    
In [41]:
markers = pd.DataFrame(data = {'clusters': clusters,
                              'genes':genes,
                              'logFC':logFC,
                              'score':score,
                              'pvals':pvals,
                              'pvals_adj':pvals_adj,
                              })
In [42]:
markers.to_csv(path_or_buf = 'wt.markers.csv', sep = ',', index = False)
In [43]:
marker_genes = ['Lef1','Itm2a','Mki67','Top2a','Zbtb16','Plac8','Izumo1r','Ifit1','Ifit3','Isg15','Ly6a','Fhl2','Nkg7','Fcer1g','Gzma','Gzmb','Rorc','Serpinb1a','Tmem176a','Tmem176b']
ax = sc.pl.dotplot(wt, marker_genes, groupby='cell_type')

Transcription factor dotplot

In [44]:
marker_genes = ['Blk', 'Rorc', 'Tbx21', 'Tox', 'Zbtb16', 'Gata3', 'Sox4','Lef1']
ax = sc.pl.dotplot(wt, marker_genes, groupby='cell_type', save='fig.1h.pdf')

#sc.pl.matrixplot(wt, marker_genes, groupby='cell_type', dendrogram=True,
#                      use_raw=False, vmin=-3, vmax=3, cmap='bwr',  swap_axes=True, figsize=(5,6))
WARNING: saving figure to file figures/dotplotfig.1h.pdf
In [45]:
sc.pl.matrixplot(wt, marker_genes, groupby='cell_type', cmap='bwr')
Out[45]:
GridSpec(2, 3, height_ratios=[0, 10.5], width_ratios=[2.56, 0, 0.2])

Saving cellbrowser

In [46]:
wt.write('./output/wt.ann.h5ad')
In [47]:
wt.obs['umap_1'] = pd.DataFrame(wt.obsm['X_umap']).iloc[:,0].tolist()
wt.obs['umap_2'] = pd.DataFrame(wt.obsm['X_umap']).iloc[:,1].tolist()
wt.obs.to_csv(path_or_buf = 'output/metadata.wt.tsv', sep = '\t', index = True)

sc.external.exporting.cellbrowser(wt, 
                                 'cellbrowser/wt', 
                                 'wt', 
                                  annot_keys=['cell_type', 'louvain', 'sample', 
                                              'n_counts', 'n_genes', 'percent_mito', 'percent_ribo'], 
                                 cluster_field='cell_type', nb_marker=30,
                                 skip_matrix=False, do_debug=True)
WARNING:root:cellbrowser/wt/cellbrowser.conf already exists. Overwriting existing files.
INFO:root:Writing scanpy matrix (3288 cells, 12958 genes) to cellbrowser/wt/exprMatrix.tsv.gz
INFO:root:Transposing matrix
INFO:root:Writing gene-by-gene, without using pandas
INFO:root:Writing 12958 genes in total
INFO:root:Wrote 0 genes
INFO:root:Wrote 2000 genes
INFO:root:Wrote 4000 genes
INFO:root:Wrote 6000 genes
INFO:root:Wrote 8000 genes
INFO:root:Wrote 10000 genes
INFO:root:Wrote 12000 genes
DEBUG:root:Compressing cellbrowser/wt/exprMatrix.tsv.gz.tmp
DEBUG:root:Running gzip -f cellbrowser/wt/exprMatrix.tsv.gz.tmp
DEBUG:root:Renaming cellbrowser/wt/exprMatrix.tsv.gz.tmp.gz to cellbrowser/wt/exprMatrix.tsv.gz
DEBUG:root:Couldnt find coordinates for ForceAtlas2, tried keys fa, X_fa and X_draw_graph_fa
DEBUG:root:Couldnt find coordinates for Fruchterman Reingold, tried keys fr, X_fr and X_draw_graph_fr
DEBUG:root:Couldnt find coordinates for Grid Fruchterman Reingold, tried keys grid_fr, X_grid_fr and X_draw_graph_grid_fr
DEBUG:root:Couldnt find coordinates for Kamadi Kawai, tried keys kk, X_kk and X_draw_graph_kk
DEBUG:root:Couldnt find coordinates for Large Graph Layout, tried keys lgl, X_lgl and X_draw_graph_lgl
DEBUG:root:Couldnt find coordinates for DrL Distributed Recursive Layout, tried keys drl, X_drl and X_draw_graph_drl
DEBUG:root:Couldnt find coordinates for Reingold Tilford tree, tried keys rt, X_rt and X_draw_graph_rt
DEBUG:root:Couldnt find coordinates for t-SNE, tried keys tsne, X_tsne and X_draw_graph_tsne
INFO:root:Writing UMAP coords to cellbrowser/wt/umap_coords.tsv
DEBUG:root:Couldnt find coordinates for PAGA/ForceAtlas2, tried keys pagaFa, X_pagaFa and X_draw_graph_pagaFa
DEBUG:root:Couldnt find coordinates for PAGA/Fruchterman-Reingold, tried keys pagaFr, X_pagaFr and X_draw_graph_pagaFr
DEBUG:root:Couldnt find coordinates for PHATE, tried keys phate, X_phate and X_draw_graph_phate
INFO:root:Writing cellbrowser/wt/markers.tsv
DEBUG:root:getting meta field: cell_type -> cell_type
DEBUG:root:getting meta field: louvain -> Louvain Cluster
DEBUG:root:getting meta field: sample -> sample
DEBUG:root:getting meta field: n_counts -> UMI Count
DEBUG:root:getting meta field: n_genes -> Expressed Genes
DEBUG:root:getting meta field: percent_mito -> Percent Mitochond.
DEBUG:root:getting meta field: percent_ribo -> percent_ribo
INFO:root:Generating cellbrowser/wt/quickGenes.tsv from cellbrowser/wt/markers.tsv
DEBUG:root:Reading cluster markers from cellbrowser/wt/markers.tsv
DEBUG:root:Separator for cellbrowser/wt/markers.tsv is '\t'
INFO:root:Reading cellbrowser/wt/markers.tsv: assuming marker file format (cluster, gene, score) + any other fields
DEBUG:root:Other headers: []
DEBUG:root:Other headers with type: []
DEBUG:root:score field has name 'z_score'
INFO:root:cellbrowser/wt/cellbrowser.conf already exists, not overwriting. Remove and re-run command to recreate.

Remove Cycling and NKT1d

In [48]:
wt = sc.read_h5ad('./output/wt.ann.h5ad')
cell_bool = []
for x in wt.obs['cell_type']:
    cell_bool = cell_bool + [x in ['NKT0','NKT17','NKT2a','NKT2b','NKT1a','NKT1b','NKT1c']]
In [49]:
list_of_cell_names = wt.obs.loc[cell_bool, :].index.tolist()
wt = wt[list_of_cell_names, ]
wt.shape
Out[49]:
(2955, 12958)

Gatting dotplot

In [50]:
marker_genes = ['Sdc1', 'Klrb1c', 'Ly6a', 'Izumo1r','Cd24a']
ax = sc.pl.dotplot(wt, marker_genes, groupby='cell_type', save='fig.5a.pdf')
WARNING: saving figure to file figures/dotplotfig.5a.pdf

thymus egress dotplot

In [51]:
wt = sc.read_h5ad('./output/wt.ann.h5ad')
cell_bool = []
for x in wt.obs['cell_type']:
    cell_bool = cell_bool + [x in ['NKT0','NKT17','NKT2a','NKT2b','NKT1a','NKT1b','NKT1c']]
In [52]:
list_of_cell_names = wt.obs.loc[cell_bool, :].index.tolist()
wt = wt[list_of_cell_names, ]
wt.shape
Out[52]:
(2955, 12958)
In [53]:
marker_genes = ['S1pr1', 'Klf2', 'Emp3', 'S100a4', 'S100a6', 'Cd69', 'Cxcr3']
ax = sc.pl.dotplot(wt, marker_genes, groupby='cell_type', save='fig.6a.pdf')
WARNING: saving figure to file figures/dotplotfig.6a.pdf

Correlation heatmap

In [54]:
mean_cellType = np.empty((len(wt.obs['cell_type'].unique()), wt.raw.shape[1]), 
                           dtype=float, order='C')
In [55]:
raw_adata = wt.raw.X.toarray()
In [56]:
for i in range(0, len(wt.obs['cell_type'].unique())):
    #print(adata.obs['phenograph'].unique()[i])
    mean_cellType[i,:] = np.mean(raw_adata[wt.obs['cell_type'] == wt.obs['cell_type'].unique()[i], :], axis = 0)
In [57]:
mean_df = pd.DataFrame(np.corrcoef(mean_cellType), index = wt.obs['cell_type'].unique(), columns = wt.obs['cell_type'].unique())
In [58]:
ax = sns.clustermap(mean_df, cmap="bwr")
ax.savefig("figure.6c.pdf")

NKT2a versus NKT2b differential expression

In [59]:
wt = sc.read_h5ad('./output/wt.ann.h5ad')
cell_bool = []
for x in wt.obs['cell_type']:
    cell_bool = cell_bool + [x in ['NKT2a','NKT2b']]
In [60]:
list_of_cell_names = wt.obs.loc[cell_bool, :].index.tolist()
wt = wt[list_of_cell_names, ]
wt.shape
Out[60]:
(726, 12958)
In [61]:
sc.tl.rank_genes_groups(wt, groupby='cell_type', key_added='group_DE_results', n_genes=200)
ranking genes
    finished: added to `.uns['group_DE_results']`
    'names', sorted np.recarray to be indexed by group ids
    'scores', sorted np.recarray to be indexed by group ids
    'logfoldchanges', sorted np.recarray to be indexed by group ids
    'pvals', sorted np.recarray to be indexed by group ids
    'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:00)
In [62]:
cell_type_nb = {}
list_cell_type = wt.obs['cell_type'].unique().tolist()
list_cell_type.sort()
for i in range(0, len(wt.obs['cell_type'].unique().tolist())):
    cell_type_nb[list_cell_type[i]] = i

#inverted_cell_type_nb = dict([[v,k] for k,v in cell_type_nb.items()])
#inverted_cell_type_nb
In [63]:
clusters = []
genes = []
logFC = []
score = []
pvals = []
pvals_adj = []

for cl in cell_type_nb.keys():
    clusters = clusters + ([cl]*len(wt.uns['group_DE_results']['names'][str(cl)]))
    genes = genes + wt.uns['group_DE_results']['names'][str(cl)].tolist()
    logFC = logFC + wt.uns['group_DE_results']['logfoldchanges'][str(cl)].tolist()
    score = score + wt.uns['group_DE_results']['scores'][str(cl)].tolist()
    pvals = pvals + wt.uns['group_DE_results']['pvals'][str(cl)].tolist()
    pvals_adj = pvals_adj + wt.uns['group_DE_results']['pvals_adj'][str(cl)].tolist()
    
In [64]:
markers = pd.DataFrame(data = {'clusters': clusters,
                              'genes':genes,
                              'logFC':logFC,
                              'score':score,
                              'pvals':pvals,
                              'pvals_adj':pvals_adj,
                              })
In [65]:
markers.to_csv(path_or_buf = 'nkt2.markers.csv', sep = ',', index = False)
In [66]:
sc.pl.rank_genes_groups_violin(wt, use_raw=True, key='group_DE_results', groups=["NKT2a"], gene_names=wt.uns['group_DE_results']['names']["NKT2a"][1:10])
sc.pl.rank_genes_groups_violin(wt, use_raw=True, key='group_DE_results', groups=["NKT2a"], gene_names=wt.uns['group_DE_results']['names']["NKT2a"][90:100])
In [67]:
sc.pl.rank_genes_groups_violin(wt, use_raw=True, key='group_DE_results', groups=["NKT2b"], gene_names=wt.uns['group_DE_results']['names']["NKT2b"][1:10])
sc.pl.rank_genes_groups_violin(wt, use_raw=True, key='group_DE_results', groups=["NKT2b"], gene_names=wt.uns['group_DE_results']['names']["NKT2b"][90:100])
In [68]:
nkt2_genes = ['Tesc','Lgals1','S100a6','Vim','Thy1','Crip1','Lmo4','Pdlim1','Rgcc','Txn1','Stmn1','Emp3','Arl4c','Cxcr6','Igfbp4','Rora','Lgals3','H2afz','Cfl1','Cox6c','Ccr9','Dbi','Prdx1','Hmgn2','Dut','Ldha','Emb','Gpx1','Ppia','Izumo1r','Tpt1','Hcst','Ctsw','Malat1','Ctsd','Btg1','Nkg7','Klrb1c','Cmtm7','Maf','S100a11','H2-D1','H2-K1','H2-Q7','Eef1a1','AW112010','Ypel3','Cd53','Gimap4','Smpdl3a','Fxyd5','Serinc3','Srgn','Gimap5','Ptprcap','Cxcr3','Rsrp1','Pfdn5']
sc.pl.matrixplot(wt, nkt2_genes, groupby='cell_type', save='figure.1sup.pdf')
WARNING: saving figure to file figures/matrixplotfigure.1sup.pdf
Out[68]:
GridSpec(2, 3, height_ratios=[0, 10.5], width_ratios=[18.56, 0, 0.2])
In [69]:
ax = sc.pl.dotplot(wt, nkt2_genes, groupby='cell_type', save='figure.1sup-2.pdf')
WARNING: saving figure to file figures/dotplotfigure.1sup-2.pdf
In [70]:
result = wt.uns['group_DE_results']
groups = result['names'].dtype.names
pd.DataFrame(
    {group + '_' + key[:1]: result[key][group]
    for group in groups for key in ['names', 'logfoldchanges', 'scores', 'pvals']}).head(5)
Out[70]:
NKT2a_n NKT2a_l NKT2a_s NKT2a_p NKT2b_n NKT2b_l NKT2b_s NKT2b_p
0 Tesc 3.005036 17.589895 2.431747e-56 Nkg7 2.646115 17.654684 3.629849e-58
1 S100a6 2.952302 16.955870 6.113323e-54 Klrb1c 3.556265 16.671654 6.520522e-50
2 Vim 2.373764 14.495781 1.088113e-41 Tpt1 0.381937 14.068198 4.252395e-40
3 Pdlim1 3.456881 14.474585 6.931359e-40 Hcst 1.240436 13.536692 1.605532e-37
4 Lgals1 2.104972 14.403680 2.545626e-41 AW112010 1.432412 12.938231 1.140341e-34
In [71]:
sc.pl.umap(wt, color=['Rora','Cxcr6','Izumo1r',"Nkg7"], color_map="jet") # NKT1

Correlation

In [72]:
wt = sc.read_h5ad('./output/wt.ann.h5ad')
cell_bool = []
for x in wt.obs['cell_type']:
    cell_bool = cell_bool + [x in ['NKT0','NKT17','NKT2a','NKT2b','NKT1a','NKT1b','NKT1c']]
In [73]:
list_of_cell_names = wt.obs.loc[cell_bool, :].index.tolist()
wt = wt[list_of_cell_names, ]
wt.shape
Out[73]:
(2955, 12958)
In [74]:
mean_cellType = np.empty((len(wt.obs['cell_type'].unique()), wt.raw.shape[1]), 
                           dtype=float, order='C')
In [75]:
raw_adata = wt.raw.X.toarray()
In [76]:
for i in range(0, len(wt.obs['cell_type'].unique())):
    #print(adata.obs['phenograph'].unique()[i])
    mean_cellType[i,:] = np.mean(raw_adata[wt.obs['cell_type'] == wt.obs['cell_type'].unique()[i], :], axis = 0)
In [77]:
mean_df = pd.DataFrame(np.corrcoef(mean_cellType), index = wt.obs['cell_type'].unique(), columns = wt.obs['cell_type'].unique())
In [78]:
import seaborn
seaborn.palplot(sns.color_palette("Blues"))
ax = seaborn.clustermap(mean_df, cmap="Blues")
ax.savefig("figure.2a.pdf")